package com.mayy.spark.demo;

import org.apache.spark.api.java.function.FilterFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.SparkSession;

public class SimpleApp {

    public static void main(String[] args) {
        String sparkHome = System.getenv("SPARK_HOME");
        String logFile = sparkHome + "/README.md";
        if(args!=null&&args.length>0){
            logFile=args[0];
        }

        SparkSession spark = SparkSession.builder().appName("Simple Application").master("local[*]").getOrCreate();
        //SparkSession spark = SparkSession.builder().appName("Simple Application").getOrCreate();

        Dataset<String> logData = spark.read().textFile(logFile).cache();

        long numAs = logData.filter((FilterFunction<String>) value -> value.contains("a")).count();
        long numBs = logData.filter((FilterFunction<String>) value -> value.contains("b")).count();

        System.out.println("Lines with a: " + numAs + ", lines with b: " + numBs);
        spark.stop();

    }

}
